/*****************************************************************************
 *
 * Copyright 2019 NXP
 * All Rights Reserved
 *
 *****************************************************************************
 *
 * THIS SOFTWARE IS PROVIDED BY NXP "AS IS" AND ANY EXPRESSED OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL NXP OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 *
*****************************************************************************/

#include <stdio.h>
#include <opencv2/opencv.hpp>
#include <umat.hpp>
#include <iostream>
#include <fstream>
#include <iomanip>

#include "../../A53_gen/inc/apu_orbfast90_process_controller.hpp"
#include "apu_orbfast90_process_controller_policy.hpp"
#include "../../A53_gen/inc/apu_orbfast91_process_controller.hpp"
#include "apu_orbfast91_process_controller_policy.hpp"
#include "../../A53_gen/inc/apu_orbmatchdescriptors0_process_controller.hpp"
#include "apu_orbmatchdescriptors0_process_controller_policy.hpp"

#include "apu_process_controller_data.hpp"

using namespace cv;
using namespace std;
using namespace apugen;

#ifdef APEX2_EMULATE
#include "apu_app_config.hpp"
#include "apu_lib.hpp"
#include "apu_extras.hpp"
#include "acf_lib.hpp"
using namespace APEX2;
#else
#include <icp_data.h>
#include <apex.h>
#include <oal.h>
using namespace icp;
#endif
//Start of user code user_define
/*
 * This is the region protected from the source code emitter,
 * all the rest can be changed by the source code emitter without notice.
 */
#if !defined(APEX2_EMULATE)
#include "frame_output_v234fb.h"
#define CHNL_CNT io::IO_DATA_CH3
#endif

#include "descriptor.h"

struct Feature
{
  Point position;
  Descriptor descriptor;
  int matchedTo;
  int matchDist;

  Feature() :
    matchedTo(-1),
    matchDist(INT_MAX)
  {
  }
};

//End of user code

int TEST_CALL()
{
#ifdef APEX2_EMULATE
  setvbuf(stdout, NULL, _IONBF, 0);
  setvbuf(stderr, NULL, _IONBF, 0);
  APU_App_Config::LoadConfig("./A53_inc/apu_app.cfg");
#endif

  uint8_t lNumberInInputThreshold = 30;
  uint16_t* lPtrInConfiguration = new uint16_t[1*4];
  #ifdef APEX2_EMULATE
    auto lImage0FileName = APU_App_Config::GetValue<std::string>("INPUT_ROOT") + "f0.png";
  #else
    auto lImage0FileName = "data/common/f0.png";
  #endif
  cv::Mat lImageInImage0 = cv::imread(lImage0FileName, CV_LOAD_IMAGE_GRAYSCALE);
  #ifdef APEX2_EMULATE
    auto lImage1FileName = APU_App_Config::GetValue<std::string>("INPUT_ROOT") + "f1.png";
  #else
    auto lImage1FileName = "data/common/f1.png";
  #endif
  cv::Mat lImageInImage1 = cv::imread(lImage1FileName, CV_LOAD_IMAGE_GRAYSCALE);
  //Start of user code declare_inputs
  /*
  * This is the region protected from the source code emitter,
  * all the rest can be changed by the source code emitter without notice.
  */

  //End of user code

  cv::UMat lConnectOutputOrbFast90Output0;
  
  
  
  uint8_t* lPtrOutTemp0 = new uint8_t[576*768];
  
  cv::UMat lConnectOutputOrbFast91Output0;
  
  
  
  uint8_t* lPtrOutTemp1 = new uint8_t[576*768];
  
  int16_t* lPtrOutMatches0 = new int16_t[1*512];
  int16_t* lPtrOutMatches1 = new int16_t[1*512];
  
  //Start of user code declare_outputs
  /*
  * This is the region protected from the source code emitter,
  * all the rest can be changed by the source code emitter without notice.
  */
  const int reservedPoints = 1024;

  vector<KeyPoint> filteredKeyPoints0;
  vector<KeyPoint> filteredKeyPoints1;
  filteredKeyPoints0.reserve(reservedPoints);
  filteredKeyPoints1.reserve(reservedPoints);

  vector<Feature> features0;
  vector<Feature> features1;
  features0.reserve(reservedPoints);
  features1.reserve(reservedPoints);

  vector<DMatch> matches;
  matches.reserve(reservedPoints);
  //End of user code

  //========================================================================
  // Processing part
  //========================================================================
  int32_t retVal = 0;
  
  APU_OrbFast90_Process_Controller OrbFast90ProcessController(0);
  
  APU_OrbFast91_Process_Controller OrbFast91ProcessController(0);
  
  APU_OrbMatchDescriptors0_Process_Controller OrbMatchDescriptors0ProcessController(0);
  //-------------------------------- apuOrbFast90ProcessController --------------------------------
  InputProcessControllerData<cv::Mat, OrbFast90InputImgConversionPolicy> lOrbFast90InputImg(576, 768, VSDK_CV_8UC1);
  InputProcessControllerData<uint8_t, OrbFast90InputThrConversionPolicy> lOrbFast90InputThr(1, 1, VSDK_CV_8UC1);
  //Start of user code input_data_OrbFast90
/*
 * This is the region protected from the source code emitter,
 * all the rest can be changed by the source code emitter without notice.
 */

  //End of user code
  lOrbFast90InputThr.SetData(lNumberInInputThreshold);
  
  lOrbFast90InputImg.SetData(lImageInImage0);
    // Verification
    if (lOrbFast90InputImg.GetDataPtr()->empty())
    {
      cout << "Input image " << lImage0FileName << "was not found\n";
      exit(1);
    }
  #ifdef APEX2_EMULATE
    imshow("Image0", *lOrbFast90InputImg.GetDataPtr());
  #endif
  
  OutputProcessControllerData<uint8_t*, OrbFast90Output1ConversionPolicy> lOrbFast90Output1(576, 768, VSDK_CV_8UC1);
  OutputProcessControllerData<cv::UMat, OrbFast90Output0ConversionPolicy> lOrbFast90Output0(576, 768, VSDK_CV_8UC1);
  //Start of user code output_data_OrbFast90
/*
 * This is the region protected from the source code emitter,
 * all the rest can be changed by the source code emitter without notice.
 */

  //End of user code
  retVal |= OrbFast90ProcessController.Init();
  retVal |= OrbFast90ProcessController.Connect(lOrbFast90InputImg.mGraphDescr, lOrbFast90InputThr.mGraphDescr, lOrbFast90Output0.mGraphDescr, lOrbFast90Output1.mGraphDescr);
  retVal |= OrbFast90ProcessController.Start();
  retVal |= OrbFast90ProcessController.Wait();
  
  //Start of user code OrbFast90_after_run
  /*
   * This is the region protected from the source code emitter,
   * all the rest can be changed by the source code emitter without notice.
   */
  
    //End of user code
  
  if(retVal)
  {
    cout << "OrbFast90ProcessController error\n";
    return retVal;
  }
  lOrbFast90Output1.GetData(lPtrOutTemp0);
  
  lOrbFast90Output0.GetData(lConnectOutputOrbFast90Output0);
  
  //-------------------------------- apuOrbFast91ProcessController --------------------------------
  InputProcessControllerData<cv::Mat, OrbFast91InputImgConversionPolicy> lOrbFast91InputImg(576, 768, VSDK_CV_8UC1);
  InputProcessControllerData<uint8_t, OrbFast91InputThrConversionPolicy> lOrbFast91InputThr(1, 1, VSDK_CV_8UC1);
  //Start of user code input_data_OrbFast91
/*
 * This is the region protected from the source code emitter,
 * all the rest can be changed by the source code emitter without notice.
 */
  int lSrcWidth = lOrbFast90InputImg.mGraphDescr.cols;
  int lSrcHeight = lOrbFast90InputImg.mGraphDescr.rows;
  const int minBorderDist = 30;

  // compute the first image's descriptors
  {
      vsdk::Mat output0_mat = lOrbFast90Output0.mGraphDescr.getMat(ACCESS_READ | OAL_USAGE_CACHED);
      vsdk::Mat output1_mat = lOrbFast90Output1.mGraphDescr.getMat(ACCESS_READ | OAL_USAGE_CACHED);
      for (int y = minBorderDist; y < lSrcHeight - minBorderDist; ++y)
      {
        for (int x = minBorderDist; x < lSrcWidth - minBorderDist; ++x)
        {
          Point p(x, y);
          int index = y * lSrcWidth + x;
          if (output0_mat.data[index] > 0)
          {
            Feature f;
            float xBase, yBase;
            GetCentroidRotationLUT(output1_mat.data, lSrcWidth, p, &xBase, &yBase);
            f.descriptor = CreateDescriptorRotated(output1_mat.data, lSrcWidth, p, xBase, yBase);
            f.position = p;
            features0.push_back(f);
            filteredKeyPoints0.push_back(KeyPoint(p, 1.f));
          }
        }
      }
  }
  //End of user code
  lOrbFast91InputThr.SetData(lNumberInInputThreshold);
  
  lOrbFast91InputImg.SetData(lImageInImage1);
    // Verification
    if (lOrbFast91InputImg.GetDataPtr()->empty())
    {
      cout << "Input image " << lImage1FileName << "was not found\n";
      exit(1);
    }
  #ifdef APEX2_EMULATE
    imshow("Image1", *lOrbFast91InputImg.GetDataPtr());
  #endif
  
  OutputProcessControllerData<uint8_t*, OrbFast91Output1ConversionPolicy> lOrbFast91Output1(576, 768, VSDK_CV_8UC1);
  OutputProcessControllerData<cv::UMat, OrbFast91Output0ConversionPolicy> lOrbFast91Output0(576, 768, VSDK_CV_8UC1);
  //Start of user code output_data_OrbFast91
/*
 * This is the region protected from the source code emitter,
 * all the rest can be changed by the source code emitter without notice.
 */

  //End of user code
  retVal |= OrbFast91ProcessController.Init();
  retVal |= OrbFast91ProcessController.Connect(lOrbFast91InputImg.mGraphDescr, lOrbFast91InputThr.mGraphDescr, lOrbFast91Output0.mGraphDescr, lOrbFast91Output1.mGraphDescr);
  retVal |= OrbFast91ProcessController.Start();
  retVal |= OrbFast91ProcessController.Wait();
  
  //Start of user code OrbFast91_after_run
  /*
   * This is the region protected from the source code emitter,
   * all the rest can be changed by the source code emitter without notice.
   */
  
    //End of user code
  
  if(retVal)
  {
    cout << "OrbFast91ProcessController error\n";
    return retVal;
  }
  lOrbFast91Output1.GetData(lPtrOutTemp1);
  
  lOrbFast91Output0.GetData(lConnectOutputOrbFast91Output0);
  
  //-------------------------------- apuOrbMatchDescriptors0ProcessController --------------------------------
  InputProcessControllerData<uint16_t*, OrbMatchDescriptors0InputConfigConversionPolicy> lOrbMatchDescriptors0InputConfig(1, 4, VSDK_CV_16UC1);
  
  
  InputProcessControllerData<cv::UMat, OrbMatchDescriptors0Input1ConversionPolicy> lOrbMatchDescriptors0Input1 (4, 4096, VSDK_CV_8UC1);
  InputProcessControllerData<cv::UMat, OrbMatchDescriptors0Input0ConversionPolicy> lOrbMatchDescriptors0Input0 (4, 4096, VSDK_CV_8UC1);
  //Start of user code input_data_OrbMatchDescriptors0
/*
 * This is the region protected from the source code emitter,
 * all the rest can be changed by the source code emitter without notice.
 */
  {
      vsdk::Mat output0_mat = lOrbFast91Output0.mGraphDescr.getMat(ACCESS_READ | OAL_USAGE_CACHED);
      vsdk::Mat output1_mat = lOrbFast91Output1.mGraphDescr.getMat(ACCESS_READ | OAL_USAGE_CACHED);
      for (int y = minBorderDist; y < lSrcHeight - minBorderDist; ++y)
      {
        for (int x = minBorderDist; x < lSrcWidth - minBorderDist; ++x)
        {
          Point p(x, y);
          int index = y * lSrcWidth + x;
          if (output0_mat.data[index] > 0)
          {
            Feature f;
            float xBase, yBase;
            GetCentroidRotationLUT(output1_mat.data, lSrcWidth, p, &xBase, &yBase);
            f.descriptor = CreateDescriptorRotated(output1_mat.data, lSrcWidth, p, xBase, yBase);
            f.position = p;
            features1.push_back(f);
            filteredKeyPoints1.push_back(KeyPoint(p, 1.f));
          }
        }
      }
    }

    const unsigned int matchingDataChunkX = 128;
    const unsigned int matchingDataChunkY = 4;
    const unsigned int cuCount = 32;
    const int minDistLimit = 40;
    const int rangeTest = 40;

    lConnectOutputOrbFast90Output0 = vsdk::UMat(matchingDataChunkY, matchingDataChunkX * cuCount, VSDK_CV_8UC1);
    lConnectOutputOrbFast91Output0 = vsdk::UMat(matchingDataChunkY, matchingDataChunkX * cuCount, VSDK_CV_8UC1);

    vsdk::Mat descriptorsData0_mat = lConnectOutputOrbFast90Output0.getMat(ACCESS_READ | OAL_USAGE_CACHED);
    vsdk::Mat descriptorsData1_mat = lConnectOutputOrbFast91Output0.getMat(ACCESS_READ | OAL_USAGE_CACHED);

    lPtrInConfiguration[0] = (uint16_t)features0.size();
    lPtrInConfiguration[1] = (uint16_t)features1.size();
    lPtrInConfiguration[2] = (uint16_t)minDistLimit;
    lPtrInConfiguration[3] = (uint16_t)rangeTest;

    for (unsigned int h = 0; h < matchingDataChunkY; ++h)
    {
      for (unsigned int i = 0; i < matchingDataChunkX; ++i)
      {
        unsigned int index = h * matchingDataChunkX + i;
        int rowOffset = h * matchingDataChunkX * cuCount;
        for (unsigned int j = 0; j < cuCount; ++j)
        {
      	  descriptorsData0_mat.at<uint8_t>(rowOffset + j * matchingDataChunkX + i) = index < features0.size() ? features0[index].descriptor.GetByte(j) : 0;
        }
      }
    }

    for (unsigned int h = 0; h < matchingDataChunkY; ++h)
    {
      for (unsigned int i = 0; i < matchingDataChunkX; ++i)
      {
        unsigned int index = h * matchingDataChunkX + i;
        int rowOffset = h * matchingDataChunkX * cuCount;
        for (unsigned int j = 0; j < cuCount; ++j)
        {
          descriptorsData1_mat.at<uint8_t>(rowOffset + j * matchingDataChunkX + i) = index < features1.size() ? features1[index].descriptor.GetByte(j) : 0;
        }
      }
    }
  //End of user code
  lOrbMatchDescriptors0InputConfig.SetData(lPtrInConfiguration);
  
  
  lOrbMatchDescriptors0Input1.SetData(lConnectOutputOrbFast90Output0);
  lOrbMatchDescriptors0Input0.SetData(lConnectOutputOrbFast91Output0);
  
  OutputProcessControllerData<int16_t*, OrbMatchDescriptors0Output0ConversionPolicy> lOrbMatchDescriptors0Output0(1, 512, VSDK_CV_16SC1);
  OutputProcessControllerData<int16_t*, OrbMatchDescriptors0Output1ConversionPolicy> lOrbMatchDescriptors0Output1(1, 512, VSDK_CV_16SC1);
  //Start of user code output_data_OrbMatchDescriptors0
/*
 * This is the region protected from the source code emitter,
 * all the rest can be changed by the source code emitter without notice.
 */

  //End of user code
  retVal |= OrbMatchDescriptors0ProcessController.Init();
  retVal |= OrbMatchDescriptors0ProcessController.Connect(lOrbMatchDescriptors0Input1.mGraphDescr, lOrbMatchDescriptors0Input0.mGraphDescr, lOrbMatchDescriptors0InputConfig.mGraphDescr, lOrbMatchDescriptors0Output0.mGraphDescr, lOrbMatchDescriptors0Output1.mGraphDescr);
  retVal |= OrbMatchDescriptors0ProcessController.Start();
  retVal |= OrbMatchDescriptors0ProcessController.Wait();
  
  //Start of user code OrbMatchDescriptors0_after_run
  /*
   * This is the region protected from the source code emitter,
   * all the rest can be changed by the source code emitter without notice.
   */
  
    //End of user code
  
  if(retVal)
  {
    cout << "OrbMatchDescriptors0ProcessController error\n";
    return retVal;
  }
  lOrbMatchDescriptors0Output0.GetData(lPtrOutMatches0);
  lOrbMatchDescriptors0Output1.GetData(lPtrOutMatches1);
  //Start of user code processing_part
  /*
  * This is the region protected from the source code emitter,
  * all the rest can be changed by the source code emitter without notice.
  */
  {
    vsdk::Mat matchesData0_mat = lOrbMatchDescriptors0Output0.mGraphDescr.getMat(ACCESS_READ | OAL_USAGE_CACHED);
    for (unsigned int i0 = 0; i0 < features0.size(); ++i0)
    {
      int matchedTo = matchesData0_mat.at<int16_t>(i0);
      if (matchedTo >= 0)
      {
        DMatch m;
        m.imgIdx = 0;
        m.queryIdx = (int)i0;
        m.trainIdx = matchedTo;
        m.distance = float(.0f);
        matches.push_back(m);
      }
    }
  }

  cv::Mat out(lOrbFast90InputImg.mGraphDescr.rows, lOrbFast90InputImg.mGraphDescr.cols + lOrbFast91InputImg.mGraphDescr.cols, CV_8UC3);

    {
      vsdk::Mat in0_mat = lOrbFast90InputImg.mGraphDescr.getMat(ACCESS_RW | OAL_USAGE_CACHED);
      vsdk::Mat in1_mat = lOrbFast91InputImg.mGraphDescr.getMat(ACCESS_RW | OAL_USAGE_CACHED);
      for (uint i = 0; i < filteredKeyPoints0.size(); ++i)
        circle((cv::Mat)in0_mat, filteredKeyPoints0[i].pt, 2, Scalar(50, 50, 50), 2);

      for (uint i = 0; i < filteredKeyPoints1.size(); ++i)
        circle((cv::Mat)in1_mat, filteredKeyPoints1[i].pt, 2, Scalar(50, 50, 50), 2);

      for (int j = 0; j < lOrbFast90InputImg.mGraphDescr.rows; ++j)
      for (int i = 0; i < lOrbFast90InputImg.mGraphDescr.cols; ++i)
      {
   	  out.at<Vec3b>(j, i)[0] = in0_mat.at<unsigned char>(j, i);
   	  out.at<Vec3b>(j, i)[1] = in0_mat.at<unsigned char>(j, i);
   	  out.at<Vec3b>(j, i)[2] = in0_mat.at<unsigned char>(j, i);
   	  out.at<Vec3b>(j, i+in0_mat.cols)[0] = in1_mat.at<unsigned char>(j, i);
   	  out.at<Vec3b>(j, i+in0_mat.cols)[1] = in1_mat.at<unsigned char>(j, i);
   	  out.at<Vec3b>(j, i+in0_mat.cols)[2] = in1_mat.at<unsigned char>(j, i);
      }
    }

    unsigned char color = 50;
    int sw = 0;

    for(uint i = 0; i < matches.size(); i++ )
    {
      int i1 = matches[i].queryIdx;
      int i2 = matches[i].trainIdx;

      Point kp1 = filteredKeyPoints0[i1].pt;
      Point kp2 = filteredKeyPoints1[i2].pt;
      kp2.x += lOrbFast90InputImg.mGraphDescr.cols;
      Scalar col;
      if (sw % 3 == 0)
          col = Scalar(255, color, color);
      if (sw % 3 == 1)
          col = Scalar(color, 255, color);
      if (sw % 3 == 2)
          col = Scalar(color, color, 255);
      circle(out, kp1, 2, col, 2);
      circle(out, kp2, 2, col, 2);
      line( out, kp1, kp2, col, 1, CV_AA);
      color += 50;
      sw += 1;
    }
  //End of user code

  //========================================================================
  // Display part
  //========================================================================
  
  
  //Start of user code display_part
  /*
  * This is the region protected from the source code emitter,
  * all the rest can be changed by the source code emitter without notice.
  */
#if !defined(APEX2_EMULATE) && !defined(__INTEGRITY__)

  // Initialize different output class for Standalone and Linux
  #ifdef __STANDALONE__
    io::FrameOutputDCU output(1280, 720,  io::IO_DATA_DEPTH_08, CHNL_CNT);
  #else
    io::FrameOutputV234Fb output(1280, 720, io::IO_DATA_DEPTH_08, CHNL_CNT);
  #endif

  // Output buffer (screen size) and it's mapped version (using cv mat in order to have copyTo functions)
  vsdk::UMat output_umat = vsdk::UMat(720, 1280, VSDK_CV_8UC3);
  {
    cv::Mat    output_mat = output_umat.getMat(ACCESS_WRITE | OAL_USAGE_CACHED);
    memset(output_mat.data, 0, 720*1280*3);

    cv::resize(out, out, cv::Size(0, 0), 1280.0 / out.cols, 1280.0 / out.cols, cv::INTER_NEAREST);
    out.copyTo(output_mat(cv::Rect(0, (720-out.rows)/2, out.cols, out.rows)));
  }

  output.PutFrame(output_umat);
#else
  std::vector<int> params;
  params.push_back(CV_IMWRITE_PNG_COMPRESSION);
  params.push_back(0);

  imwrite("out.png", out, params);
#endif


#ifdef APEX2_EMULATE
  imshow("matches", out);
  waitKey();
#endif
  //End of user code
  
  delete [] lPtrOutTemp0;
  delete [] lPtrOutTemp1;
  delete [] lPtrInConfiguration;
  delete [] lPtrOutMatches0;
  delete [] lPtrOutMatches1;

  return 0;
}
